import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import sqlalchemy
def test():
    url = "https://www.xcar.com.cn/wenda/list/1"
    HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
    'Accept-Encoding': 'gzip,deflate,sdch',
    'Accept-Language': 'zh-CN,zh;q=0.8'
    }
    datax = []
    for i in range(3256):
        if i%100 ==0:
            print('休息8秒')
            time.sleep(8)
        url = "https://www.xcar.com.cn/wenda/list/{0}".format(i+1)
        print(url)
        html = requests.get(url,headers=HEADERS).text
        soup = BeautifulSoup(html,'lxml')
        datas = soup.find_all('div',class_='qaList')[0].find_all('li')
        for data in datas:
            title = data.find('h1').get_text()
            text = data.find('p').get_text()
            datax.append([title,text])
    # pd.DataFrame(datax,columns=['Question','Answer']).to_excel('xcar_data_0314.xlsx')


if __name__ == '__main__':
    print()
    # test()








